library(estimatr)

###Merging and checking the drawn to realized sample of zones ####
ph2$location.id <- tolower(paste(ph2$Division.of.Residence,ph2$Parish.of.Residence,ph2$Zone.of.Residence,sep="."))

insample.zones <- c(resp1.zones$location.id,ex.sample2$location.id[ex.sample2$InSample==1])
insample.zone.ids <- c(resp1.zones$X, ex.sample2$X[ex.sample2$InSample==1])
insample.dta <- data.frame(insample.zones,insample.zone.ids)

length(unique(insample.dta$insample.zones))
insample.dta$insample.zones[duplicated(insample.dta$insample.zones)]
see1 <- resp1.zones[resp1.zones$location.id=="nakawa.mbuya ii.zone i",]
#Note: there is a non-unique zone name from the Phase I sample, investigate
#Note: both of the non-unique zones are assigned to the responsiveness condition in Phase 2
#From Jacob (9/16): "We actually sent two teams to this zone on the same day and recruited 24 subjects; 1/2 subjects were recruited using random recruitment and 1/2 subjects using nomination."
#Note: see the post-compile code for the fix to this
#Note: for the purpose of the Phase I analysis, these duplicates are considered separate zones

check <- ph2[!(ph2$location.id %in% insample.dta$insample.zones),]
unique(check$location.id) #Have to check why these zones appear in the data
#makindye.makindye i.kipamba: out-of-sample replacement zone; deviation from sampling protocol
#rubaga.namirembe.mengo town: out-of-sample replacement zone; deviation from sampling protocol
#makindye.katwe i.musoke: out-of-sample replacement zone; deviation from sampling protocol
o.s.r <- c("makindye.makindye i.kipamba","rubaga.namirembe.mengo town","makindye.katwe i.musoke")
ph2$out.sample.replacement <- ifelse(ph2$location.id %in% o.s.r, 1, 0)

#rubaga.kabalagala.kiwafu: appears that district is incorrect in data; should be Makindye Division
ph2$location.id <- ifelse(ph2$location.id=="rubaga.kabalagala.kiwafu","makindye.kabalagala.kiwafu",ph2$location.id)
#kawempe.kikaya.kisota: appears that district is incorrect in data; should be Nakawa Division
ph2$location.id <- ifelse(ph2$location.id=="kawempe.kikaya.kisota","nakawa.kikaya.kisota",ph2$location.id)

#rubaga.nateete.nateete central d: spelling difference; revert to original
ph2$location.id <- ifelse(ph2$location.id=="rubaga.nateete.nateete central d","rubaga.natete.nateete central d",ph2$location.id)
#central.kisenyi ii.kiganda: spelling difference; revert to original
ph2$location.id <- ifelse(ph2$location.id=="central.kisenyi ii.kiganda","central.kiseny ii.kiganda",ph2$location.id)
#central.kisenyi ii.mengo hill: spelling difference; revert to original
ph2$location.id <- ifelse(ph2$location.id=="central.kisenyi ii.mengo hill","central.kiseny ii.mengo hill",ph2$location.id)

length(unique(ph2$location.id)) #Note: not all zones generated reports in Phase 2
insample.dta$insample.zones[!(insample.dta$insample.zones %in% ph2$location.id)]
#"rubaga.lungujja.makamba": Phase 1 recruitment; no reports
#"makindye.bukasa.namuwongo b": Phase 1 recruitment; no reports 
#"makindye.kisugu.mutajazi c": Phase 1 recruitment; no reports   
#"nakawa.ntinda.village 7": Phase 1 recruitment; no reports
#"central.kamwokya i.village a": Phase 1 recruitment; no reports
#"makindye.kansanga-muyenga.pepsi": Phase 2 recruitment; no reports

#"central.mengo.namirembe": Zone replaced in Phase 2 with out-of-sample replacement
#"rubaga.najjanankumbi i.kipamba": Zone replaced in Phase 2 with out-of-sample replacement
#"central.kisenyi iii.musoke": Zone replaced in Phase 2 with out-of-sample replacement
removed.z <- c("central.mengo.namirembe","rubaga.najjanankumbi i.kipamba","central.kisenyi iii.musoke")
removed <- ifelse(insample.dta$insample.zones %in% removed.z, 1, 0)

#Merging and checking the assigned and realized responsiveness treatment
table(ph2$location.id,ph2$Responsiveness) #No cross-over of treatment assignment
ph2.tab <- as.data.frame.matrix(table(ph2$location.id,ph2$Responsiveness))
names(ph2.tab) <- c("X0","X1")
treated.list <- row.names(ph2.tab)[ph2.tab$X1>0]
control.list <- row.names(ph2.tab)[ph2.tab$X0>0]

resp1.zones$location.id[resp1.zones$responsiveness==0] %in% control.list
(resp1.zones$location.id[resp1.zones$responsiveness==0])[c(15,27,38,39)]
#"makindye.bukasa.namuwongo b": Phase 1 recruitment; no reports
#"makindye.kisugu.mutajazi c": Phase 1 recruitment; no reports
#"nakawa.ntinda.village 7": Phase 1 recruitment; no reports        
#"central.kamwokya i.village a": Phase 1 recruitment; no reports
#Finding: no zone-wise non-compliance with treatment assignment

resp1.zones$location.id[resp1.zones$responsiveness==1] %in% treated.list
(resp1.zones$location.id[resp1.zones$responsiveness==1])[14]
#"rubaga.lungujja.makamba": Phase 1 recruitment; no reports
#Finding: no zone-wise non-compliance with treatment assignment

#Looking at the data
table(ph2$Responsiveness) #Impression: large treatment effect of responsiveness

#Adding recruitment phase to ph2 reports file
ph2$phase <- ifelse(ph2$location.id %in% resp1.zones$location.id, "one", "two") #Checked: no missing phase indicators

#Adding other treatment assignments to "ph2" object
p1.neighbor.list <- resp1.zones$location.id[resp1.zones$assignment=="nomination"]
p1.random.list <- resp1.zones$location.id[resp1.zones$assignment=="random"]
p2.lc1.list <- ex.sample2$location.id[ex.sample2$lc1.recruit==1 & ex.sample2$InSample==1]
p2.random.list <- ex.sample2$location.id[ex.sample2$lc1.recruit==0 & ex.sample2$InSample==1]
p2.announce_trt.list <- ex.sample2$location.id[ex.sample2$lc1.announce==1 & ex.sample2$InSample==1]
p2.announce_ctl.list <- ex.sample2$location.id[ex.sample2$lc1.announce==0 & ex.sample2$InSample==1]

ph2$recruitment <- ifelse(ph2$location.id %in% p1.neighbor.list, "neighbor",
                          ifelse(ph2$location.id %in% p2.lc1.list, "lc1",
                                 ifelse(ph2$location.id %in% p1.random.list | ph2$location.id %in% p2.random.list, "random", NA)))
#table(ph2$recruitment, useNA = "always") #To Do: figure out why NA's are in the data
#see <- subset(ph2,is.na(recruitment))
#table(see$location.id) #OK; these are the location.id's for zones were replaced out-of-sample and excluded from the analysis

ph2$lc1.announce <- ifelse(ph2$location.id %in% p2.announce_trt.list, 1,
                           ifelse(ph2$location.id %in% p2.announce_ctl.list, 0, NA))
#table(ph2$lc1.announce, useNA="always")

ph2$lc1.announce.w.ph1 <- ifelse(ph2$phase=="one",0,ph2$lc1.announce)
#table(ph2$lc1.announce.w.ph1, useNA = "always")


###Creating Zone-Wise Dataframe for all reporting in Phase 2 ####
#To Do: include the out-of-sample replacement zones for extended analysis
ph2.zones <- data.frame(insample.zones,removed)
names(ph2.zones)[1] <- "location.id"
ph2.zones$phase <- ifelse(ph2.zones$location.id %in% resp1.zones$location.id, "one", "two")

for (i in 1:nrow(ph2.zones)){
  
  if (ph2.zones$phase[i]=="one"){
    ph2.zones$recruitment[i] <- resp1.zones[resp1.zones$location.id==ph2.zones$location.id[i],"assignment"]
    ph2.zones$lc1.announce[i] <- 0
    ph2.zones$responsiveness[i] <- resp1.zones[resp1.zones$location.id==ph2.zones$location.id[i],"responsiveness"]
  }
  
  if (ph2.zones$phase[i]=="two"){
    ph2.zones$recruitment[i] <- ifelse((ex.sample2[ex.sample2$location.id==ph2.zones$location.id[i],"lc1.recruit"])==0, "random", "lc1.recruit")
    ph2.zones$lc1.announce[i] <- ex.sample2[ex.sample2$location.id==ph2.zones$location.id[i],"lc1.announce"]
    ph2.zones$responsiveness[i] <- ex.sample2[ex.sample2$location.id==ph2.zones$location.id[i],"Responsiveness"]
  }
} #Warnings fixed at end of block, due to duplicate nakawa.mbuya ii.zone i in Phase 1 sample
ph2.zones$recruitment <- factor(ph2.zones$recruitment, levels=c("random","nomination","lc1.recruit"))

for (i in 1:nrow(ph2.zones)){
  ph2.zones$total.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Data.Usable.mb=="Yes"))
  ph2.zones$last2weeks.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number>=13 & Data.Usable.mb=="Yes"))
  
  sub <- subset(ph2, location.id==ph2.zones$location.id[i] & Data.Usable.mb=="Yes")
  ph2.zones$active.reporters[i] <- nrow(sub[!duplicated(sub$Subject.ID),])
  
  ph2.zones$q1.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==1 & Data.Usable.mb=="Yes"))
  ph2.zones$q2.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==2 & Data.Usable.mb=="Yes"))
  ph2.zones$q3.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==3 & Data.Usable.mb=="Yes"))
  ph2.zones$q4.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==4 & Data.Usable.mb=="Yes"))
  ph2.zones$q5.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==5 & Data.Usable.mb=="Yes"))
  ph2.zones$q6.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==6 & Data.Usable.mb=="Yes"))
  ph2.zones$q7.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==7 & Data.Usable.mb=="Yes"))
  ph2.zones$q8.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==8 & Data.Usable.mb=="Yes"))
  ph2.zones$q9.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==9 & Data.Usable.mb=="Yes"))
  ph2.zones$q10.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==10 & Data.Usable.mb=="Yes"))
  ph2.zones$q11.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==11 & Data.Usable.mb=="Yes"))
  ph2.zones$q12.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==12 & Data.Usable.mb=="Yes"))
  ph2.zones$q13.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==13 & Data.Usable.mb=="Yes"))
  ph2.zones$q14.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==14 & Data.Usable.mb=="Yes"))
  ph2.zones$q15.responses[i] <- nrow(subset(ph2, location.id==ph2.zones$location.id[i] & Question.Number==15 & Data.Usable.mb=="Yes"))
}

##Adding reporters per zone
p2.reporters <- read.csv("./ph2_reporters_baseline.csv", stringsAsFactors=FALSE) #uncleaned list

ph2.reporters <- read.csv("./ph1ph2_reporters_list_assigned.csv", stringsAsFactors=FALSE)[,1:11]
ph2.reporters$location.id <- tolower(paste(ph2.reporters$Division.of.Residence,ph2.reporters$Parish.of.Residence,ph2.reporters$Zone.of.Residence, sep="."))
r.zones <- sort(unique(ph2.reporters$location.id))
r.zones[!(r.zones %in% ph2.zones$location.id)] #location.ids without a match in the zone-wise analysis file

#central.kisenyi ii.kiganda: spelling difference; revert to original
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="central.kisenyi ii.kiganda","central.kiseny ii.kiganda",ph2.reporters$location.id)

#central.kisenyi ii.mengo hill: spelling difference; revert to original
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="central.kisenyi ii.mengo hill","central.kiseny ii.mengo hill",ph2.reporters$location.id)

#kawempe.kikaya.kisota: appears that district is incorrect in data; should be Nakawa Division
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="kawempe.kikaya.kisota","nakawa.kikaya.kisota",ph2.reporters$location.id)

#rubaga.kabalagala.kiwafu: appears that district is incorrect in data; should be Makindye Division
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="rubaga.kabalagala.kiwafu","makindye.kabalagala.kiwafu",ph2.reporters$location.id)

#rubaga.nateete.nateete central d: spelling difference; revert to original
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="rubaga.nateete.nateete central d","rubaga.natete.nateete central d",ph2.reporters$location.id)

#"rubaga.mbuya ii.zone 7": District entered incorrectly in reporter file
ph2.reporters$location.id <- ifelse(ph2.reporters$location.id=="rubaga.mbuya ii.zone 7","nakawa.mbuya ii.zone 7",ph2.reporters$location.id)

#makindye.makindye i.kipamba: out-of-sample replacement zone; deviation from sampling protocol
#rubaga.namirembe.mengo town: out-of-sample replacement zone; deviation from sampling protocol
#makindye.katwe i.musoke: out-of-sample replacement zone; deviation from sampling protocol

for (i in 1:nrow(ph2.zones)){
  ph2.zones$total.reporters[i] <- nrow(subset(ph2.reporters, location.id==ph2.zones$location.id[i]))
}

check <- subset(ph2.zones, total.reporters==0)
#Only nakawa.ntinda.village 7 has no reporters, but was not removed from the sample

ph2.zones <- subset(ph2.zones, location.id!="nakawa.ntinda.village 7")

tofix <- subset(ph2.zones, location.id=="nakawa.mbuya ii.zone i")

#Fixing dual method of recruitment in nakawa.mbuya ii.zone i
ph2.zones$recruitment[ph2.zones$location.id=="nakawa.mbuya ii.zone i"] <- c("random","nomination")

ph2.reporters.zonei <- subset(ph2.reporters, location.id=="nakawa.mbuya ii.zone i")
for (i in 1:nrow(ph2.reporters.zonei)){
  ph2.reporters.zonei$Recruitment.Treatment[i] <- ph1.reporters$Nominated[ph1.reporters$mobile_number==ph2.reporters.zonei$Mobile.Number[i]]
}
ph2.reporters.zonei$Recruitment.Treatment <- ifelse(ph2.reporters.zonei$Recruitment.Treatment=="no", "random", "nomination")

ph2.zones$total.reporters[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2.reporters.zonei, Recruitment.Treatment=="random"))
ph2.zones$total.reporters[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2.reporters.zonei, Recruitment.Treatment=="nomination"))

zi.ran <- paste("256",ph2.reporters.zonei$Mobile.Number[ph2.reporters.zonei$Recruitment.Treatment=="random"],sep="")
zi.nom <- paste("256",ph2.reporters.zonei$Mobile.Number[ph2.reporters.zonei$Recruitment.Treatment=="nomination"],sep="")

ph2.zones$total.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Data.Usable.mb=="Yes"))
ph2.zones$total.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Data.Usable.mb=="Yes"))

ph2.zones$last2weeks.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number>=13 & Data.Usable.mb=="Yes"))
ph2.zones$last2weeks.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number>=13 & Data.Usable.mb=="Yes"))

sub.ran <- subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Data.Usable.mb=="Yes")
sub.nom <- subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Data.Usable.mb=="Yes")

ph2.zones$active.reporters[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(sub.ran[!duplicated(sub.ran$Subject.ID),])
ph2.zones$active.reporters[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(sub.nom[!duplicated(sub.nom$Subject.ID),])

ph2.zones$q1.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==1 & Data.Usable.mb=="Yes"))
ph2.zones$q1.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==1 & Data.Usable.mb=="Yes"))

ph2.zones$q2.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==2 & Data.Usable.mb=="Yes"))
ph2.zones$q2.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==2 & Data.Usable.mb=="Yes"))

ph2.zones$q3.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==3 & Data.Usable.mb=="Yes"))
ph2.zones$q3.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==3 & Data.Usable.mb=="Yes"))

ph2.zones$q4.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==4 & Data.Usable.mb=="Yes"))
ph2.zones$q4.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==4 & Data.Usable.mb=="Yes"))

ph2.zones$q5.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==5 & Data.Usable.mb=="Yes"))
ph2.zones$q5.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==5 & Data.Usable.mb=="Yes"))

ph2.zones$q6.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==6 & Data.Usable.mb=="Yes"))
ph2.zones$q6.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==6 & Data.Usable.mb=="Yes"))

ph2.zones$q7.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==7 & Data.Usable.mb=="Yes"))
ph2.zones$q7.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==7 & Data.Usable.mb=="Yes"))

ph2.zones$q8.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==8 & Data.Usable.mb=="Yes"))
ph2.zones$q8.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==8 & Data.Usable.mb=="Yes"))

ph2.zones$q9.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==9 & Data.Usable.mb=="Yes"))
ph2.zones$q9.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==9 & Data.Usable.mb=="Yes"))

ph2.zones$q10.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==10 & Data.Usable.mb=="Yes"))
ph2.zones$q10.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==10 & Data.Usable.mb=="Yes"))

ph2.zones$q11.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==11 & Data.Usable.mb=="Yes"))
ph2.zones$q11.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==11 & Data.Usable.mb=="Yes"))

ph2.zones$q12.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==12 & Data.Usable.mb=="Yes"))
ph2.zones$q12.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==12 & Data.Usable.mb=="Yes"))

ph2.zones$q13.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==13 & Data.Usable.mb=="Yes"))
ph2.zones$q13.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==13 & Data.Usable.mb=="Yes"))

ph2.zones$q14.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==14 & Data.Usable.mb=="Yes"))
ph2.zones$q14.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==14 & Data.Usable.mb=="Yes"))

ph2.zones$q15.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="random"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.ran & Question.Number==15 & Data.Usable.mb=="Yes"))
ph2.zones$q15.responses[ph2.zones$location.id=="nakawa.mbuya ii.zone i" & ph2.zones$recruitment=="nomination"] <- nrow(subset(ph2, location.id=="nakawa.mbuya ii.zone i" & Mobile.Number %in% zi.nom & Question.Number==15 & Data.Usable.mb=="Yes"))


###Finishing Reporter-Wise Dataframe for all reporting in Phase 2 ####

ph2.reporters$full_number <- paste("256", ph2.reporters$Mobile.Number,sep="")
out.of.protocol.list <- unique(ph2.reporters[!(ph2.reporters$location.id %in% ex.sample2$location.id) & ph2.reporters$Recruitment.Phase==2, "location.id"])
ph2.reporters <- ph2.reporters[!(ph2.reporters$location.id %in% out.of.protocol.list),]

for (i in 1:nrow(ph2.reporters)){
  
  if (ph2.reporters$Recruitment.Phase[i]==1 & ph2.reporters$location.id[i]!="nakawa.mbuya ii.zone i"){
    ph2.reporters$recruitment[i] <- resp1.zones[resp1.zones$location.id==ph2.reporters$location.id[i],"assignment"]
    ph2.reporters$lc1.announce[i] <- 0
  }
  
  if (ph2.reporters$Recruitment.Phase[i]==1 & ph2.reporters$location.id[i]=="nakawa.mbuya ii.zone i"){
    ph2.reporters$recruitment[i] <- ifelse(ph1.reporters$Nominated[ph1.reporters$mobile_number==ph2.reporters$Mobile.Number[i]]=="no","random","nomination")
    ph2.reporters$lc1.announce[i] <- 0
  }
  
  if (ph2.reporters$Recruitment.Phase[i]==2){
    ph2.reporters$recruitment[i] <- ifelse((ex.sample2[ex.sample2$location.id==ph2.reporters$location.id[i],"lc1.recruit"])==0, "random", "lc1.recruit")
    ph2.reporters$lc1.announce[i] <- ex.sample2[ex.sample2$location.id==ph2.reporters$location.id[i],"lc1.announce"]
  }
}

ph2.reporters$recruitment <- factor(ph2.reporters$recruitment, levels=c("random","nomination","lc1.recruit"))

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$total.responses[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Data.Usable.mb=="Yes"))
  ph2.reporters$last2week.responses[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number>=13 & Data.Usable.mb=="Yes"))
}
ph2.reporters$active.ever <- ifelse(ph2.reporters$total.responses>0,1,0)

table(ph2.reporters$Responsiveness, ph2.reporters$active.ever)
table(ph2.reporters$Responsiveness, ph2.reporters$total.responses)

###Adding spillover variable from spill.dta2
spill.dta2 <- read.csv("./ph2_responsiveness_contiguous.csv")
#Note: both "nakawa.mbuya ii.zone i" have contiguous zone, no need to distinguish in loop
for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$con.test[i] <- subset(spill.dta2, location.id==ph2.reporters$location.id[i])[1,"con.test"]
}

###Adding probability of Responsiveness exposure
resp.exposure.prob <- read.csv("./ph2_responsiveness_spillover_probability.csv")
resp.exposure.prob$location.id <- as.character(resp.exposure.prob$location.id)
resp.exposure.prob$location.id <- ifelse(resp.exposure.prob$location.id=="nakawa.kyanja.kulambiro-kondogolo","nakawa.kyanja.kulambiro",resp.exposure.prob$location.id) #Naming inconsistency that does not indicate non-compliance with recruitment protocol
#unique(ph2.reporters$location.id)[!(unique(ph2.reporters$location.id) %in% resp.exposure.prob$location.id)]

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$d0i0.prob[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"d0i0"]/100000
  ph2.reporters$d1i0.prob[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"d1i0"]/100000
  ph2.reporters$d0i1.prob[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"d0i1"]/100000
  ph2.reporters$d1i1.prob[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"d1i1"]/100000
  ph2.reporters$direct[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"direct"]
  ph2.reporters$indirect[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"indirect"]
}

for (i in 1:nrow(ph2.reporters)){
  
  if (ph2.reporters$location.id[i]=="nakawa.mbuya ii.zone i" & ph2.reporters$recruitment[i]=="random"){
    ph2.reporters$d0i0.prob[i] <- subset(resp.exposure.prob, zone.id==695)[1,"d0i0"]/100000
    ph2.reporters$d1i0.prob[i] <- subset(resp.exposure.prob, zone.id==695)[1,"d1i0"]/100000
    ph2.reporters$d0i1.prob[i] <- subset(resp.exposure.prob, zone.id==695)[1,"d0i1"]/100000
    ph2.reporters$d1i1.prob[i] <- subset(resp.exposure.prob, zone.id==695)[1,"d1i1"]/100000
    ph2.reporters$direct[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"direct"]
    ph2.reporters$indirect[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"indirect"]
  }
  
  if (ph2.reporters$location.id[i]=="nakawa.mbuya ii.zone i" & ph2.reporters$recruitment[i]=="nomination"){
    ph2.reporters$d0i0.prob[i] <- subset(resp.exposure.prob, zone.id==607)[1,"d0i0"]/100000
    ph2.reporters$d1i0.prob[i] <- subset(resp.exposure.prob, zone.id==607)[1,"d1i0"]/100000
    ph2.reporters$d0i1.prob[i] <- subset(resp.exposure.prob, zone.id==607)[1,"d0i1"]/100000
    ph2.reporters$d1i1.prob[i] <- subset(resp.exposure.prob, zone.id==607)[1,"d1i1"]/100000
    ph2.reporters$direct[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"direct"]
    ph2.reporters$indirect[i] <- subset(resp.exposure.prob, location.id==ph2.reporters$location.id[i])[1,"indirect"]
  }
} #Fixing probabilities for the duplicated "nakawa.mbuya ii.zone i" zones

ph2.reporters$indirect.prob <- ph2.reporters$d0i1.prob + ph2.reporters$d1i1.prob

#Checking assignment between files
table(ph2.reporters$Responsiveness, ph2.reporters$direct)
see <- subset(ph2.reporters, Responsiveness==1 & direct==0)
#All discrepancies in responsiveness treatment are for Kakato II (treatment error), see email "central.bukesa.kakato ii" on treatment error

ph2.reporters$exposure <- paste("d",ph2.reporters$direct,"i",ph2.reporters$indirect, sep="")

ph2.reporters$prob.weight[ph2.reporters$direct==1 & ph2.reporters$indirect==1] <- ph2.reporters$d1i1.prob[ph2.reporters$direct==1 & ph2.reporters$indirect==1]
ph2.reporters$prob.weight[ph2.reporters$direct==1 & ph2.reporters$indirect==0] <- ph2.reporters$d1i0.prob[ph2.reporters$direct==1 & ph2.reporters$indirect==0]
ph2.reporters$prob.weight[ph2.reporters$direct==0 & ph2.reporters$indirect==1] <- ph2.reporters$d0i1.prob[ph2.reporters$direct==0 & ph2.reporters$indirect==1]
ph2.reporters$prob.weight[ph2.reporters$direct==0 & ph2.reporters$indirect==0] <- ph2.reporters$d0i0.prob[ph2.reporters$direct==0 & ph2.reporters$indirect==0]


###Adding responses to individual questions
for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$q1.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==1 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q2.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==2 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q3.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==3 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q4.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==4 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q5.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==5 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q6.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==6 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q7.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==7 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q8.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==8 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q9.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==9 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q10.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==10 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q11.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==11 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q12.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==12 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q13.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==13 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q14.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==14 & Data.Usable.mb=="Yes")) > 0
  ph2.reporters$q15.response[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==15 & Data.Usable.mb=="Yes")) > 0
}

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$q1.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==1 & Data.Usable.mb=="Yes"))
  ph2.reporters$q2.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==2 & Data.Usable.mb=="Yes"))
  ph2.reporters$q3.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==3 & Data.Usable.mb=="Yes"))
  ph2.reporters$q4.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==4 & Data.Usable.mb=="Yes"))
  ph2.reporters$q5.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==5 & Data.Usable.mb=="Yes"))
  ph2.reporters$q6.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==6 & Data.Usable.mb=="Yes"))
  ph2.reporters$q7.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==7 & Data.Usable.mb=="Yes"))
  ph2.reporters$q8.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==8 & Data.Usable.mb=="Yes"))
  ph2.reporters$q9.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==9 & Data.Usable.mb=="Yes"))
  ph2.reporters$q10.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==10 & Data.Usable.mb=="Yes"))
  ph2.reporters$q11.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==11 & Data.Usable.mb=="Yes"))
  ph2.reporters$q12.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==12 & Data.Usable.mb=="Yes"))
  ph2.reporters$q13.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==13 & Data.Usable.mb=="Yes"))
  ph2.reporters$q14.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==14 & Data.Usable.mb=="Yes"))
  ph2.reporters$q15.responses.cleaned[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==15 & Data.Usable.mb=="Yes"))
}

for (i in 1:nrow(ph2.reporters)){
  ph2.reporters$q1.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==1))
  ph2.reporters$q2.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==2))
  ph2.reporters$q3.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==3))
  ph2.reporters$q4.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==4))
  ph2.reporters$q5.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==5))
  ph2.reporters$q6.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==6))
  ph2.reporters$q7.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==7))
  ph2.reporters$q8.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==8))
  ph2.reporters$q9.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==9))
  ph2.reporters$q10.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==10))
  ph2.reporters$q11.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==11))
  ph2.reporters$q12.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==12))
  ph2.reporters$q13.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==13))
  ph2.reporters$q14.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==14))
  ph2.reporters$q15.responses.all[i] <- nrow(subset(ph2, Subject.ID==ph2.reporters$Subject.ID[i] & Question.Number==15))
}

#Indicator Variable for Recruitment Phase
ph2.reporters$Recruited.Phase2 <- ifelse(ph2.reporters$Recruitment.Phase==2,1,0)